{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 6 Pandas的函数应用"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2         3\n",
      "0 -1.207255 -0.228082 -0.645468 -1.737784\n",
      "1 -2.007319 -1.773175 -0.611317 -0.810143\n",
      "2 -2.690278 -0.267494  0.235944 -1.817938\n",
      "3 -2.671815 -1.047553  0.153013  0.058026\n",
      "4 -0.205010  0.656965 -0.813280 -2.017485\n",
      "          0         1         2         3\n",
      "0  1.207255  0.228082  0.645468  1.737784\n",
      "1  2.007319  1.773175  0.611317  0.810143\n",
      "2  2.690278  0.267494  0.235944  1.817938\n",
      "3  2.671815  1.047553  0.153013  0.058026\n",
      "4  0.205010  0.656965  0.813280  2.017485\n"
     ]
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "# Numpy ufunc 函数，randn跟的是维数\n",
    "df = pd.DataFrame(np.random.randn(5,4) - 1)\n",
    "print(df)\n",
    "\n",
    "print(np.abs(df)) #绝对值"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-06T02:47:22.480817800Z",
     "start_time": "2024-07-06T02:47:21.790849600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    1.087995\n",
      "1    0.434726\n",
      "2    0.711669\n",
      "3    0.329257\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "#apply默认作用在列上,x是每一列,因为axis=0\n",
    "print(df.apply(lambda x : x.max(), axis=0))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-05T08:48:09.840371500Z",
     "start_time": "2024-07-05T08:48:09.833871100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0   -0.351662\n",
      "1    0.329257\n",
      "2   -0.422633\n",
      "3    1.087995\n",
      "4    0.050864\n",
      "dtype: float64\n"
     ]
    }
   ],
   "source": [
    "#apply作用在行上\n",
    "print(df.apply(lambda x : x.max(), axis=1))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-05T08:49:32.563401900Z",
     "start_time": "2024-07-05T08:49:32.555516900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "data": {
      "text/plain": "'1.23'"
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "'%.2f' % 1.234"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-05T08:51:17.351803100Z",
     "start_time": "2024-07-05T08:51:17.327349100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "outputs": [
    {
     "data": {
      "text/plain": "      0     1     2     3\n0 -2.71 -1.13 -1.29 -0.35\n1 -1.12 -0.73 -1.14  0.33\n2 -0.99 -0.42 -1.19 -1.37\n3  1.09  0.43  0.71 -1.29\n4 -2.11 -0.45  0.05 -0.74",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n      <th>2</th>\n      <th>3</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>-2.71</td>\n      <td>-1.13</td>\n      <td>-1.29</td>\n      <td>-0.35</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>-1.12</td>\n      <td>-0.73</td>\n      <td>-1.14</td>\n      <td>0.33</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>-0.99</td>\n      <td>-0.42</td>\n      <td>-1.19</td>\n      <td>-1.37</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1.09</td>\n      <td>0.43</td>\n      <td>0.71</td>\n      <td>-1.29</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>-2.11</td>\n      <td>-0.45</td>\n      <td>0.05</td>\n      <td>-0.74</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 使用applymap应用到每个数据\n",
    "df2=df.map(lambda x : float('%.2f' % x))\n",
    "df2"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-05T08:54:11.456758400Z",
     "start_time": "2024-07-05T08:54:11.444446100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "data": {
      "text/plain": "str"
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type('%.2f' % 1.3456)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-04-11T03:27:27.258699100Z",
     "start_time": "2024-04-11T03:27:27.224712Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 6.4 索引排序"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[2 1 2 2 4]\n",
      "0    10\n",
      "2    11\n",
      "3    12\n",
      "2    13\n",
      "0    14\n",
      "dtype: int64\n",
      "--------------------------------------------------\n",
      "0    10\n",
      "0    14\n",
      "2    11\n",
      "2    13\n",
      "3    12\n",
      "dtype: int64\n",
      "0    10\n",
      "2    11\n",
      "3    12\n",
      "2    13\n",
      "0    14\n",
      "dtype: int64\n",
      "0    10\n",
      "2    11\n",
      "3    12\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/plain": "0    10\n2    11\n3    12\ndtype: int64"
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Series\n",
    "print(np.random.randint(5, size=5))\n",
    "s4 = pd.Series(range(10, 15), index = np.random.randint(5, size=5)) #索引随机生成\n",
    "print(s4)\n",
    "print('-'*50)\n",
    "# 索引排序,sort_index返回一个新的排好索引的series\n",
    "print(s4.sort_index())\n",
    "print(s4)\n",
    "# s4.loc[0:3]  loc索引值不唯一时直接报错\n",
    "print(s4.iloc[0:3])\n",
    "s4[0:3]  #默认用的位置索引"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-06T02:47:47.746314900Z",
     "start_time": "2024-07-06T02:47:47.710076100Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [],
   "source": [
    "# s4.loc[1:2] #loc索引值唯一时可以切片"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          4         2         1         4         3\n",
      "0  0.290356 -0.928574  1.326263 -0.376125 -0.222534\n",
      "1  0.664712  1.878370  0.922800  1.281062 -0.347827\n",
      "3  0.441859  1.227592  0.405797  2.634434  0.469048\n",
      "4  0.239610 -0.653958 -0.239987 -1.672604  0.447063\n",
      "3  1.857909 -0.772407  2.903085 -0.467319 -0.025959\n",
      "          4         2         1         4         3\n",
      "4  0.239610 -0.653958 -0.239987 -1.672604  0.447063\n",
      "3  0.441859  1.227592  0.405797  2.634434  0.469048\n",
      "3  1.857909 -0.772407  2.903085 -0.467319 -0.025959\n",
      "1  0.664712  1.878370  0.922800  1.281062 -0.347827\n",
      "0  0.290356 -0.928574  1.326263 -0.376125 -0.222534\n"
     ]
    }
   ],
   "source": [
    "# DataFrame\n",
    "df4 = pd.DataFrame(np.random.randn(5, 5),\n",
    "                   index=np.random.randint(5, size=5),\n",
    "                   columns=np.random.randint(5, size=5))\n",
    "print(df4)\n",
    "#轴零是行索引排序\n",
    "df4_isort = df4.sort_index(axis=0, ascending=False)\n",
    "print(df4_isort)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-06T02:48:48.800605200Z",
     "start_time": "2024-07-06T02:48:48.788106400Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          1         2         3         4         4\n",
      "0  1.326263 -0.928574 -0.222534  0.290356 -0.376125\n",
      "1  0.922800  1.878370 -0.347827  0.664712  1.281062\n",
      "3  0.405797  1.227592  0.469048  0.441859  2.634434\n",
      "4 -0.239987 -0.653958  0.447063  0.239610 -1.672604\n",
      "3  2.903085 -0.772407 -0.025959  1.857909 -0.467319\n"
     ]
    }
   ],
   "source": [
    "#轴1是列索引排序\n",
    "df4_isort = df4.sort_index(axis=1, ascending=True)\n",
    "print(df4_isort)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-06T02:49:28.297057500Z",
     "start_time": "2024-07-06T02:49:28.289445300Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 6.5 按值排序"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[36  0 74 84]\n",
      " [46 85 33 63]\n",
      " [93 48 13 35]\n",
      " [47 25 98 67]\n",
      " [71 95 79 30]\n",
      " [67 43 93 77]]\n",
      "--------------------------------------------------\n",
      "    0   1   2   3\n",
      "0  36   0  74  84\n",
      "1  46  85  33  63\n",
      "2  93  48  13  35\n",
      "3  47  25  98  67\n",
      "4  71  95  79  30\n",
      "5  67  43  93  77\n",
      "--------------------------------------------------\n",
      "    0   1   2   3\n",
      "0  36   0  74  84\n",
      "5  67  43  93  77\n",
      "3  47  25  98  67\n",
      "1  46  85  33  63\n",
      "2  93  48  13  35\n",
      "4  71  95  79  30\n"
     ]
    }
   ],
   "source": [
    "# 按值排序,by后是column的值\n",
    "import random\n",
    "l=[random.randint(0,100) for i in range(24)] #生成24个随机数\n",
    "df4 = pd.DataFrame(np.array(l).reshape(6,4)) #生成6行4列的dataframe\n",
    "print(df4.values) #查看数据,ndarray\n",
    "print('-'*50)\n",
    "print(df4)\n",
    "print('-'*50)\n",
    "#按轴零排序，by后是列名,交换的是行\n",
    "df4_vsort = df4.sort_values(by=3,axis=0, ascending=False) #寻找的是columns里的3,重要\n",
    "print(df4_vsort)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-06T02:51:52.840934600Z",
     "start_time": "2024-07-06T02:51:52.834243900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    2   3   0   1\n",
      "0  74  84  36   0\n",
      "1  33  63  46  85\n",
      "2  13  35  93  48\n",
      "3  98  67  47  25\n",
      "4  79  30  71  95\n",
      "5  93  77  67  43\n"
     ]
    }
   ],
   "source": [
    "#按轴1排序，by后行索引名，交换的是列,使用场景少\n",
    "df4_vsort = df4.sort_values(by=3,axis=1, ascending=False) #寻找的是index里的3\n",
    "print(df4_vsort)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-06T02:52:37.371239900Z",
     "start_time": "2024-07-06T02:52:37.363243600Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 6.6 处理缺失数据"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "处理缺失，要么删，要么填充"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2\n",
      "0 -0.253604  0.295339 -0.564874\n",
      "1  1.000000  2.000000       NaN\n",
      "2       NaN  4.000000       NaN\n",
      "3  1.000000  2.000000  3.000000\n"
     ]
    }
   ],
   "source": [
    "df_data = pd.DataFrame([np.random.randn(3), [1., 2., np.nan],\n",
    "                       [np.nan, 4., np.nan], [1., 2., 3.]])\n",
    "print(df_data.head())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-06T02:55:24.204975600Z",
     "start_time": "2024-07-06T02:55:24.198415500Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [
    {
     "data": {
      "text/plain": "nan"
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_data.iloc[2,0]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-06T02:55:32.079304700Z",
     "start_time": "2024-07-06T02:55:32.069695600Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       0      1      2\n",
      "0  False  False  False\n",
      "1  False  False   True\n",
      "2   True  False   True\n",
      "3  False  False  False\n"
     ]
    }
   ],
   "source": [
    "#isnull来判断是否有空的数据\n",
    "print(df_data.isnull())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-06T02:55:42.152568Z",
     "start_time": "2024-07-06T02:55:42.147182900Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 删除缺失数据"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2\n",
      "0 -0.253604  0.295339 -0.564874\n",
      "3  1.000000  2.000000  3.000000\n"
     ]
    }
   ],
   "source": [
    "#默认一个样本，任何一个特征缺失，就删除\n",
    "#inplace True是修改的是原有的df\n",
    "#subset=[0]是指按第一列来删除,第一列有空值就删除对应的行\n",
    "print(df_data.dropna())\n",
    "# df_data"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-06T02:57:24.343670300Z",
     "start_time": "2024-07-06T02:57:24.314428200Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "outputs": [
    {
     "data": {
      "text/plain": "          0         1         2\n0 -0.253604  0.295339 -0.564874\n1  1.000000  2.000000       NaN\n2       NaN  4.000000       NaN\n3  1.000000  2.000000  3.000000",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n      <th>2</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>-0.253604</td>\n      <td>0.295339</td>\n      <td>-0.564874</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1.000000</td>\n      <td>2.000000</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>NaN</td>\n      <td>4.000000</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1.000000</td>\n      <td>2.000000</td>\n      <td>3.000000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_data"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-06T02:59:28.232992Z",
     "start_time": "2024-07-06T02:59:28.220981800Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          1\n",
      "0  0.295339\n",
      "1  2.000000\n",
      "2  4.000000\n",
      "3  2.000000\n"
     ]
    }
   ],
   "source": [
    "#某个特征缺失太多时，才删除\n",
    "print(df_data.dropna(axis=1))  #某列由nan就删除该列"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-06T03:00:01.412973200Z",
     "start_time": "2024-07-06T03:00:01.400418900Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "outputs": [
    {
     "data": {
      "text/plain": "          0         1         2\n0 -0.253604  0.295339 -0.564874\n1  1.000000  2.000000       NaN\n2       NaN  4.000000       NaN\n3  1.000000  2.000000  3.000000",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n      <th>2</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>-0.253604</td>\n      <td>0.295339</td>\n      <td>-0.564874</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1.000000</td>\n      <td>2.000000</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>NaN</td>\n      <td>4.000000</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1.000000</td>\n      <td>2.000000</td>\n      <td>3.000000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_data"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-06T03:02:48.152580700Z",
     "start_time": "2024-07-06T03:02:48.143713Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 填充缺失数据"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0     -0.253604\n",
      "1      1.000000\n",
      "2   -100.000000\n",
      "3      1.000000\n",
      "Name: 0, dtype: float64\n"
     ]
    },
    {
     "data": {
      "text/plain": "          0         1         2\n0 -0.253604  0.295339 -0.564874\n1  1.000000  2.000000       NaN\n2       NaN  4.000000       NaN\n3  1.000000  2.000000  3.000000",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>0</th>\n      <th>1</th>\n      <th>2</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>-0.253604</td>\n      <td>0.295339</td>\n      <td>-0.564874</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1.000000</td>\n      <td>2.000000</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>NaN</td>\n      <td>4.000000</td>\n      <td>NaN</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1.000000</td>\n      <td>2.000000</td>\n      <td>3.000000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#给零列的空值填为-100，按特征（按列）去填充。（一般填均值，中位数，众数）\n",
    "print(df_data.iloc[:,0].fillna(-100.))\n",
    "df_data"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-06T03:03:30.290880200Z",
     "start_time": "2024-07-06T03:03:30.248488Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "outputs": [
    {
     "data": {
      "text/plain": "RangeIndex(start=0, stop=3, step=1)"
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_data.columns"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2024-07-06T03:04:30.422855100Z",
     "start_time": "2024-07-06T03:04:30.409468300Z"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0   -0.253604\n",
      "1    1.000000\n",
      "2         NaN\n",
      "3    1.000000\n",
      "Name: 0, dtype: float64\n",
      "0    0.295339\n",
      "1    2.000000\n",
      "2    4.000000\n",
      "3    2.000000\n",
      "Name: 1, dtype: float64\n",
      "0   -0.564874\n",
      "1         NaN\n",
      "2         NaN\n",
      "3    3.000000\n",
      "Name: 2, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "#依次拿到每一列\n",
    "for i in df_data.columns:\n",
    "    print(df_data.loc[:,i])"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2024-07-06T03:04:16.459456500Z",
     "start_time": "2024-07-06T03:04:16.451966Z"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "collapsed": false
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
